package com.dmcb.trade.business.crawlers.article;

import com.dmcb.trade.business.constants.CrawlConstant;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Service;

/**
 * UC头条文章解析
 * Created by Administrator on 2017/5/24 0024.
 */
@Service
public class UCTouTiaoParser  extends BaseParser {

    /**
     * 网站URL网址
     *
     * @return 链接
     */
    @Override
    protected String url() {
        return CrawlConstant.UCTOUTIAO_URL;
    }

    /**
     * 获取标题选择器
     *
     * @return 标题选择器
     */
    @Override
    protected String titleSelect() {
        return "h1.title__title__e436dd806a,div.sm-normal-page h1";
    }

    /**
     * 获取日期选择器
     *
     * @return 发布日期选择器
     */
    @Override
    protected String dateSelect() {
        return "div.sm-article-desc>span,span.undefined";
    }

    /**
     * 获取正文内容选择器
     *
     * @return 正文内容选择器
     */
    @Override
    protected String bodySelect() {
        return "div.sm-article-content,div.contentbox";
    }

    /**
     * 获取封面选择器
     *
     * @return 封面选择器
     */
    @Override
    protected String coverSelect() {
        return "div.sm-article-content>p>img";
    }

    /**
     * 获取作者选择器
     *
     * @return 作者选择器
     */
    @Override
    protected String authorNameSelect() {
        return "div.sm-article-desc>span,h3>p";
    }

    /**
     * 日期是否在末尾元素
     *
     * @return 默认否
     */
    @Override
    protected boolean isDateLast() {
        return true;
    }


    @Override
    public String getDateStr(String link) {
        Document doc = getDocument(link);
        if (doc == null) {
            return "";
        }
        Element element ;
        if(link.contains("mparticle.uc.cn")){
            element = doc.select(dateSelect()).first();
        }else{
            element = doc.select(dateSelect()).last();
        }
        if (element == null) {
            return "";
        }
        String body = element.html();
        if (StringUtils.isBlank(body)) {
            return "";
        }
        return body.trim();
    }

    @Override
    protected String platform() {
        return "UC头条";
    }
}

